The Notebook is aimed to visualize the SNP frequency (Col vs Ler) in asy1 shattered hybrids. The SNP% is for Ler. The SNP% would be different specifically around centromeres coming from different non-disjunctions.
library("tidyverse")
library("ggplot2")
library("ggpubr")
asy1.SNP.10kb <- read.csv("/Users/wendy/Desktop/Lab/arabidopsis/binbygeno_asy1_10kb_20220310.txt",sep = "\t")
head(asy1.SNP.10kb)
```r
asy1.SNP.100kb <- read.csv(\/Users/wendy/Desktop/Lab/arabidopsis/binbygeno_asy1_100kb_20220310.txt\,sep = \\t\)
head(asy1.SNP.100kb)
<!-- rnb-source-end -->
<!-- rnb-frame-begin eyJtZXRhZGF0YSI6eyJjbGFzc2VzIjoiZGF0YS5mcmFtZSIsIm5yb3ciOjYsIm5jb2wiOjEyLCJzdW1tYXJ5Ijp7IkRlc2NyaXB0aW9uIjoiZGYgWzYgw5cgMTJdIn19LCJyZGYiOiJINHNJQUFBQUFBQUFBNldTejA4VFFSVEgzN2E3eFc1YVVWRkFqVXBpREVTeUk0VkVsQk9ocUJjU0UzL0Uzc3l3WGFCaG1TWGJwUkREZ1FzbVhMejREK2gvNHYvaHhhUC9BVGZrTzlzM01HeDdzOG0zODk3N3pIdjdadDY4VzIwdCtDMmZpTXJrT3ZqM1lKTDM4Y1ByNEFXUlc0TGprRXRWdlI0QWo4R29RVGVnQ2dPM3VaVTIvc3NtcXVmMTlNLzUrb1BLOVdWeXYvK2h5cjBqR3ZrNWRjR0hzZXIwcjh0ODV4dk0zMmp4TSt3VDlIdEtkUFBZNHR0RTEvNFNWVnV3WDRKdllPK1N4VGZCUGtGZllDK0RINEt2V2YzTlF0T3NHZllEaTkrSHhxRmIwQ2cweHI3aFU2d0gwRVBvTWZ1RzY3dVk0OVhZenl5dS9hZlFFK2dScnpPRi9JQnpUUDVzWVpDZWtqdFJsNGRZTTBFTUl0a3h6dnRNcGhrNzVWZXF6ZWFkbm5nam8wd3FJYVZvaXNiY3ZHZ21QWWEzZTJLdHFhTml2aUhDTEkwdE5zNE1aT0Y1RHUzTWljR3liOWU3WXFXUWZGbllwcE5EU2hmNFFNOHlEaS80eEdEYk5yNDdyUFAraHF1M1drMlRmV0Z1Vm8ramRKUy9aejJXcTljZnhySnJydDhFL2JiTXBOaElrUS92ckpBeWt1eG1uVVFocWFRZmxGZElkdEpDWUhSUDZVN2FRYmkxcDdhRFJmMkJIUGRWNTdWaTJiWCtKMHYvdUpUSHBTcVIydXlveVBRZXkvVW9adWM2VHB3ZldPeW1IV1dlaTQ5b1YyUkpKczArUDB4aUU4blBSbWZuRnZIRnVuY0VBQUE9In0= -->
<div data-pagedtable="false">
<script data-pagedtable-source type="application/json">
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["Chrom"],"name":[1],"type":["chr"],"align":["left"]},{"label":["Start"],"name":[2],"type":["int"],"align":["right"]},{"label":["End"],"name":[3],"type":["int"],"align":["right"]},{"label":["v.Gaetan.aa.C.102.Cov"],"name":[4],"type":["int"],"align":["right"]},{"label":["v.LC.C.1.21.ctrl.Cov"],"name":[5],"type":["int"],"align":["right"]},{"label":["v.LC.C.21.36.ctrl2.Cov"],"name":[6],"type":["int"],"align":["right"]},{"label":["v.Gaetan.aa.C.102.Obs.B"],"name":[7],"type":["int"],"align":["right"]},{"label":["v.LC.C.1.21.ctrl.Obs.B"],"name":[8],"type":["int"],"align":["right"]},{"label":["v.LC.C.21.36.ctrl2.Obs.B"],"name":[9],"type":["int"],"align":["right"]},{"label":["v.Gaetan.aa.C.102.Calc.B"],"name":[10],"type":["int"],"align":["right"]},{"label":["v.LC.C.1.21.ctrl.Calc.B"],"name":[11],"type":["int"],"align":["right"]},{"label":["v.LC.C.21.36.ctrl2.Calc.B"],"name":[12],"type":["int"],"align":["right"]}],"data":[{"1":"Chr1","2":"0","3":"100000","4":"398","5":"363","6":"359","7":"43","8":"27","9":"32","10":"49","11":"48","12":"49","_rn_":"1"},{"1":"Chr1","2":"100000","3":"200000","4":"3547","5":"2280","6":"2391","7":"39","8":"22","9":"32","10":"48","11":"42","12":"45","_rn_":"2"},{"1":"Chr1","2":"200000","3":"300000","4":"3167","5":"2392","6":"2426","7":"39","8":"18","9":"29","10":"49","11":"37","12":"47","_rn_":"3"},{"1":"Chr1","2":"300000","3":"400000","4":"393","5":"313","6":"320","7":"40","8":"15","9":"30","10":"49","11":"31","12":"49","_rn_":"4"},{"1":"Chr1","2":"400000","3":"500000","4":"1015","5":"870","6":"892","7":"43","8":"19","9":"35","10":"48","11":"37","12":"48","_rn_":"5"},{"1":"Chr1","2":"500000","3":"600000","4":"4485","5":"3130","6":"3148","7":"45","8":"22","9":"32","10":"47","11":"40","12":"43","_rn_":"6"}],"options":{"columns":{"min":{},"max":[10],"total":[12]},"rows":{"min":[10],"max":[10],"total":[6]},"pages":{}}}
</script>
</div>
<!-- rnb-frame-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# take data in Chr1 #
## 10kb ##
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxucG9zLmNocjEuMTBrYiA8LSBhc3kxLlNOUC4xMGtiJFN0YXJ0W2FzeTEuU05QLjEwa2IkQ2hyb20gPT0gJ0NocjEnXVxubGVyLmNocjEuMTBrYiA8LSBhc3kxLlNOUC4xMGtiJHYuR2FldGFuLmFhLkMuMTAyLk9icy5CW2FzeTEuU05QLjEwa2IkQ2hyb20gPT0gJ0NocjEnXVxuY2hyMS4xMGtiIDwtIGRhdGEuZnJhbWUocG9zLmNocjEuMTBrYixsZXIuY2hyMS4xMGtiKVxuc25wLmNocjEuMTBrYiA8LSBsaXN0KHBvcy5jaHIxLjEwa2IpXG5gYGAifQ== -->
```r
pos.chr1.10kb <- asy1.SNP.10kb$Start[asy1.SNP.10kb$Chrom == 'Chr1']
ler.chr1.10kb <- asy1.SNP.10kb$v.Gaetan.aa.C.102.Obs.B[asy1.SNP.10kb$Chrom == 'Chr1']
chr1.10kb <- data.frame(pos.chr1.10kb,ler.chr1.10kb)
snp.chr1.10kb <- list(pos.chr1.10kb)
```r
pos.chr1.100kb <- asy1.SNP.100kb$Start[asy1.SNP.100kb$Chrom == 'Chr1']
ler.chr1.100kb <- asy1.SNP.100kb$v.Gaetan.aa.C.102.Obs.B[asy1.SNP.100kb$Chrom == 'Chr1']
chr1.100kb <- data.frame(pos.chr1.100kb, ler.chr1.100kb)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# SNP frequency plots #
## 10kb ##
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuc25wLjEwa2IgPC0gZ2dwbG90KGNocjEuMTBrYiwgYWVzKHBvcy5jaHIxLjEwa2IsbGVyLmNocjEuMTBrYikpICtcbiAgZ2VvbV9wb2ludChhbHBoYT0uNSkgK1xuICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gYyhcIiNFNjlGMDBcIikpICtcbiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoc2l6ZSA9IDgsIGFuZ2xlID0gOTApLCBheGlzLnRpdGxlLnggPSBlbGVtZW50X3RleHQoc2l6ZSA9IDEwLCBtYXJnaW4gPSBtYXJnaW4odD0xMCkpLCBheGlzLnRpdGxlLnkgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDgsIG1hcmdpbiA9IG1hcmdpbihyPTEwKSksIGxlZ2VuZC5wb3NpdGlvbiA9IFwiTm9uZVwiKSArIFxuICBzY2FsZV94X2NvbnRpbnVvdXMobmFtZT1cIkNocm9tb3NvbWUgMVwiLCBicmVha3MgPSBOVUxMKSArXG4gIHNjYWxlX3lfY29udGludW91cyhuYW1lPVwiTGVyLTEgYWxsZWwgRnJlcXVlbmN5XCIpXG5zbnAuMTBrYlxuYGBgIn0= -->
```r
snp.10kb <- ggplot(chr1.10kb, aes(pos.chr1.10kb,ler.chr1.10kb)) +
geom_point(alpha=.5) +
scale_color_manual(values = c("#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name="Chromosome 1", breaks = NULL) +
scale_y_continuous(name="Ler-1 allel Frequency")
snp.10kb
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/asy1_snp_chr1_10kb_20220629.png",width = 6, height = 2)
asy1.dos.10k <- read.csv("/Users/wendy/Desktop/Lab/arabidopsis/binbysam-Gaetan-102-2ctrl-10kb-20210916.txt",sep = "\t")
asy1.strt <- asy1.dos.10k$Strt[asy1.dos.10k$Chrom == 'Chr1']
asy1.Cov <- asy1.dos.10k$Gaetan_aa_C_102.LC_C_1_21_ctrl[asy1.dos.10k$Chrom == 'Chr1']
asy1.dos.Chr1 <- data.frame(asy1.strt, asy1.Cov)
head(asy1.dos.Chr1)
dos.chr1.strt.new <- c()
dos.chr1.cov.new <- c()
for (a in asy1.dos.Chr1$asy1.strt) {
pos <- a-1
for (b in snp.chr1.10kb[[1]]) {
if (pos == b) {
dos.chr1.strt.new <- c(dos.chr1.strt.new, pos)
dos.chr1.cov.new <- c(dos.chr1.cov.new, asy1.dos.Chr1$asy1.Cov[asy1.strt == a])
}
}
}
length(dos.chr1.strt.new)
[1] 2771
length(dos.chr1.cov.new)
[1] 2771
length(snp.chr1.10kb[[1]])
[1] 2771
dos.chr1.cov.new <- as.numeric(dos.chr1.cov.new)
snp.dos.chr1.10k <- data.frame(dos.chr1.strt.new, dos.chr1.cov.new, ler.chr1.10kb)
head(snp.dos.chr1.10k)
snp.10kb <- ggplot(snp.dos.chr1.10k, aes(dos.chr1.strt.new,ler.chr1.10kb)) +
geom_point(alpha=.5) +
scale_color_manual(values = c("#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name="Chromosome 1", breaks = NULL) +
scale_y_continuous(name="Ler-1 allel Frequency")
snp.10kb
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/asy1_snp_chr1_10kb_20220629.png",width = 6, height = 1.5)
dos.10kb <- ggplot(snp.dos.chr1.10k, aes(dos.chr1.strt.new,dos.chr1.cov.new)) +
geom_point(alpha=.5,color = "navy") +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name = "", breaks = NULL) +
scale_y_continuous(name = "Relative read coverage",limits = c(1,4))
dos.10kb
Warning: Removed 18 rows containing missing values (geom_point).
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/asy1_dos_chr1_10kb_20220629.png",width = 6, height = 1.5)
Warning: Removed 18 rows containing missing values (geom_point).
snp.dos.chr1.10k.1217 <- snp.dos.chr1.10k %>%
filter(12000000 <= dos.chr1.strt.new & dos.chr1.strt.new <= 17000000)
snp.dos.chr1.10k.1217
```r
write.table(snp.dos.chr1.10k, file = \/Users/wendy/Desktop/Lab/arabidopsis/snp_dos_chr1_10kb_20220315.txt\, sep = \\t\, row.names = TRUE, col.names = TRUE)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
# plot snp #
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuc25wLjEwa2IgPC0gZ2dwbG90KHNucC5kb3MuY2hyMS4xMGsuMTIxNywgYWVzKGRvcy5jaHIxLnN0cnQubmV3LGxlci5jaHIxLjEwa2IpKSArXG4gIGdlb21fcG9pbnQoYWxwaGE9LjUpICtcbiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoXCIjRTY5RjAwXCIpKSArXG4gIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHNpemUgPSA4LCBhbmdsZSA9IDkwKSwgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxMCwgbWFyZ2luID0gbWFyZ2luKHQ9MTApKSwgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KHNpemUgPSA4LCBtYXJnaW4gPSBtYXJnaW4ocj0xMCkpLCBsZWdlbmQucG9zaXRpb24gPSBcIk5vbmVcIikgKyBcbiAgc2NhbGVfeF9jb250aW51b3VzKG5hbWU9XCJDaHJvbW9zb21lIDE6IDEwTWIgdG8gMTZNYlwiLCBicmVha3MgPSBOVUxMKSArXG4gIHNjYWxlX3lfY29udGludW91cyhuYW1lPVwiTGVyLTEgYWxsZWwgRnJlcXVlbmN5XCIpXG5zbnAuMTBrYlxuYGBgIn0= -->
```r
snp.10kb <- ggplot(snp.dos.chr1.10k.1217, aes(dos.chr1.strt.new,ler.chr1.10kb)) +
geom_point(alpha=.5) +
scale_color_manual(values = c("#E69F00")) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name="Chromosome 1: 10Mb to 16Mb", breaks = NULL) +
scale_y_continuous(name="Ler-1 allel Frequency")
snp.10kb
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/asy1_snp_chr1_1217_20220629.png",width = 6, height = 1.5)
dos.10kb <- ggplot(snp.dos.chr1.10k.1217, aes(dos.chr1.strt.new,dos.chr1.cov.new)) +
geom_point(alpha=.5,color = "navy") +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name = "", breaks = NULL) +
scale_y_continuous(name = "Relative read coverage",limits = c(1,4))
dos.10kb
Warning: Removed 7 rows containing missing values (geom_point).
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/asy1_dos_chr1_1217_20220629.png",width = 6, height = 1.5)
Warning: Removed 7 rows containing missing values (geom_point).
dos.snp.10k <- ggplot(snp.dos.chr1.10k, aes(x = dos.chr1.strt.new)) +
geom_point(aes(y=dos.chr1.cov.new),alpha=.6,color="orange") +
geom_point(aes(y=ler.chr1.10kb/25),alpha=.5,color="navy") +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 14, margin = margin(t=10)), axis.title.y = element_text(size = 14, margin = margin(r=10))) +
scale_x_continuous(name = "Chromosome 1", breaks = NULL) +
scale_y_continuous(name = "Relative Read Coverage",sec.axis = sec_axis(trans = ~.*25, name = "Ler SNP Frequency"))
dos.snp.10k
ggsave("/Users/wendy/Desktop/manuscript-ASY1/snp_dos_chr1_10kb_20220622.png", height = 4, width = 8)
dos.10kb <- ggplot(asy1.dos.Chr1, aes(asy1.strt,as.numeric(asy1.Cov))) +
geom_point(alpha=.5) +
theme(axis.text.x = element_text(size = 8, angle = 90), axis.title.x = element_text(size = 10, margin = margin(t=10)), axis.title.y = element_text(size = 8, margin = margin(r=10)), legend.position = "None") +
scale_x_continuous(name = "", breaks = NULL) +
scale_y_continuous(name = "Relative read coverage",limits = c(1,4))
dos.10kb
Warning in FUN(X[[i]], ...) : NAs introduced by coercion
Warning: Removed 31 rows containing missing values (geom_point).
ggsave("/Users/wendy/Desktop/Lab/arabidopsis/plots/dosage-chr1-20220629.png", width = 6, height = 1.5)
Warning in FUN(X[[i]], ...) : NAs introduced by coercion
Warning: Removed 31 rows containing missing values (geom_point).
onepage <- ggarrange(dos.10kb, snp.10kb, ncol = 1, nrow = 2)
Warning in FUN(X[[i]], ...) : NAs introduced by coercion
Warning: Removed 31 rows containing missing values (geom_point).
onepage
ggsave("/Users/wendy/Desktop/manuscript-ASY1/asy1_snp_10kb_20220622.png", width = 6, height = 3)